clear all
use Ma_EDCC_2020_Data.dta

********************************************************************************
** Tab 3: Test the wedge in shadow farm and off-farm wages
********************************************************************************

********************************************
** Zero observation transformed
********************************************

********************************************
** Estimation the home production function
********************************************

** Total home prod costs
gen hh_hpc = hh_cropc + hh_mechc + hh_hirelbc + hh_lsc  

** Total home production days
gen hh_hpd = 0
qui foreach i of numlist 1/512{
	egen temp1=sum(hp_d) if hhno==`i' & yr==2015 
	egen temp2=sum(hp_d) if hhno==`i' & yr==2016 
	egen temp3=max(temp1) if hhno==`i' & yr==2015 
	egen temp4=max(temp2) if hhno==`i' & yr==2016 
	
	replace hh_hpd=temp3 if hhno==`i' & yr==2015 
	replace hh_hpd=temp4 if hhno==`i' & yr==2016
	drop temp*
	}

** Chnage unit of value
qui foreach i in hh_hpv hh_hpc hh_lsv{
	replace `i'=`i'*1000
	}
	
** Define large ranches, the top 2.5%
gen hh_lrch=0
replace hh_lrch=1 if hh_lsv>100000

** Transform zeros by adding one
qui foreach i in hh_hpv  {
	replace `i'= `i'+ 1 if hh_hpd!=0
	}
	
** Only add one to HH that produce sth
qui foreach i in hh_hpc hh_el hh_hpd {
	replace `i'= `i'+ 1 if hh_hpd!=0
	}	

** Log-transformation of the Cobb-Dougls form
qui foreach i in hh_hpv hh_hpc hh_hpd hh_el {
	gen ln_`i'=log(`i')
	}	
	
** Estimate home prod fn: log-log
** No vil FE

** OLS
** No vil FE
local hh_v "ln_hh_el ln_hh_hpd ln_hh_hpc"
local hh_v2 "hh_perncrop hh_lc hh_ldreall hh_ldexpp hh_elcontr hh_subs "
local hh_v3 "genr hh_lb hh_hsld hh_mechv hh_headm hh_headage hh_headedu"
local fe "yr16" 

qui reg ln_hh_hpv ///
`hh_v' `hh_v2' `hh_v3' `fe' ///
if pers_id==1 & hh_lf==0 & hh_lrch==0, vce(cluster vil_id)

/** Robustness check: frontier function
* save the coefficients
* the log of the square of the root mean square error and the value .1 in a matrix called b0

qui local hh_v "ln_hh_el ln_hh_hpd ln_hh_hpc"
local hh_v2 "hh_perncrop hh_lc hh_ldreall hh_ldexpp hh_elcontr hh_subs "
local hh_v3 "genr hh_lb hh_hsld hh_mechv hh_headm hh_headage hh_headedu"
local fe "yr16" 

qui reg ln_hh_hpv ///
`hh_v' `hh_v2' `hh_v3' `fe' ///
if pers_id==1 & hh_lf==0 & hh_lrch==0, vce(cluster vil_id)

matrix b1=e(b),ln(e(rmse)^2), 0.1
matrix list b1

** Use stochastic frontier fn
local hh_v "ln_hh_el ln_hh_hpd ln_hh_hpc"
local hh_v2 "hh_perncrop hh_lc hh_ldreall hh_ldexpp hh_elcontr hh_subs"
local hh_v3 "genr hh_lb hh_hsld hh_mechv hh_headm hh_headage hh_headedu"
local fe "yr16" 

frontier ln_hh_hpv ///
`hh_v' `hh_v2' `hh_v3' `fe' ///
if pers_id==1 & hh_lf==0 & hh_lrch==0, from(b1, copy) vce(cluster vil_id) **/

** Predicted home prod value
predict hh_hpv_hat, xb 
replace hh_hpv_hat = exp(hh_hpv_hat)
replace hh_hpv_hat= round(hh_hpv_hat, 1)

** Deduct the one added earlier
replace hh_hpv = hh_hpv - 1 if hh_hpd!=0
replace hh_hpv_hat = hh_hpv_hat -1 if hh_hpd!=0

** Summarize observed and predicted home prod value
sum hh_hpv hh_hpv_hat if pers_id==1 & hh_lf==0 & hh_lrch==0 & hh_hpd!=0
corr hh_hpv hh_hpv_hat if pers_id==1 & hh_lf==0 & hh_lrch==0 & hh_hpd!=0

*********************************************
** Production contribution of labor input
*********************************************

** The coef of home prod labor
gen hpd_b =  0.0746

** Estimate shadow on-farm productivity of labor
gen hp_swgmn = (hh_hpv_hat*hpd_b/hh_hpd)*30

** Round the estimated wage rate
replace hp_swgmn = round(hp_swgmn, 1)

** Missing info
replace hp_swgmn = . if (male==.|hp_d==0)

** Change unit of observed nonfarm wage rate
replace nf_wgmn=nf_wgmn*1000

** Round the observed wage rate
replace nf_wgmn = round(nf_wgmn,1)

** Summarize the two wage rates
sum nf_wgmn hp_swgmn if ptf_if==1 & hh_lf==0 & hh_lrch==0 & nf_wgmn!=.

** The labor works on other farms, while the corresponding value is unknown
list hh_id pers_id yr male hh_lb hh_hpv hh_hpv_hat nf_wgmn ///
if nf_wgmn!=. & ptf_if==1 & hp_swgmn==.& hh_lf==0 & hh_lrch==0

**
sum hp_swgmn nf_wgmn if ptf_if==1 & hh_lf==0 & hh_lrch==0 & nf_wgmn!=. & hp_swgmn!=.

************************************************
** Test wage wedges: FGLS
************************************************

**
keep if ptf_if==1 & hh_lf==0 & hh_lrch==0 & hp_swgmn!=. & nf_wgmn!=.

**
gen lhp_swgmn =log(hp_swgmn)
gen lnf_wgmn = log(nf_wgmn)

** SE of estimated shadow wage
egen  se_swgmn = sd(hp_swgmn) 
egen  se_lswgmn = sd(lhp_swgmn) 

********************************
** IV: linear, multiple IVs
********************************

**
qui ivregress 2sls hp_swgmn (nf_wgmn = male age edu birthpl vil_tocd) ///
if ptf_if==1 & hh_lf==0 & hh_lrch==0

predict resid, residuals
** generating residuals
gen residsq = resid*resid

** sum of residual squares
qui sum residsq
local sumresidsq =r(sum)

** generating Omega
gen omegasq = se_swgmn^2

**
set matsize 10000
mkmat omegasq, matrix(omegasq)

**
matrix G= diag(omegasq)

**
qui sum omegasq
local sumomegasq =r(sum)

** generating matrices
gen ones=1
mkmat nf_wgmn yr16 vil1-vil13 ones, matrix(X)
local N= rowsof(X)
local K=colsof(X)
matrix S =  inv(X'*X)*X'*G*X

** computing sigma and weights
local tr_S = trace(S)
local sigmahatsq = (`sumresidsq' - `sumomegasq' + `tr_S')/(`N' - `K')

**
gen wt = 1/(sqrt(omegasq+`sigmahatsq'))

** second step regression
ivregress 2sls hp_swgmn (nf_wgmn = male age edu birthpl vil_tocd) ///
if ptf_if==1 & hh_lf==0 & hh_lrch==0 [pweight=wt], first

**
display "sigmahat " sqrt(`sigmahatsq')

**
qui sum se_swgmn
display "omega(average)" r(mean)

drop resid-wt

****************************
** IV: linear, multiple IVs
** W/ vil dummies
****************************

**
qui ivregress 2sls hp_swgmn (nf_wgmn = male age edu birthpl vil_tocd) yr16 vil1-vil13 ///
if ptf_if==1 & hh_lf==0 & hh_lrch==0

predict resid, residuals
** generating residuals
gen residsq = resid*resid

** sum of residual squares
qui sum residsq
local sumresidsq =r(sum)

** generating Omega
gen omegasq = se_swgmn^2

**
set matsize 10000
mkmat omegasq, matrix(omegasq)

**
matrix G= diag(omegasq)

**
qui sum omegasq
local sumomegasq =r(sum)

** generating matrices
gen ones=1
mkmat nf_wgmn yr16 vil1-vil13 ones, matrix(X)
local N= rowsof(X)
local K=colsof(X)
matrix S =  inv(X'*X)*X'*G*X

** computing sigma and weights
local tr_S = trace(S)
local sigmahatsq = (`sumresidsq' - `sumomegasq' + `tr_S')/(`N' - `K')

**
gen wt = 1/(sqrt(omegasq+`sigmahatsq'))

** second step regression
ivregress 2sls hp_swgmn (nf_wgmn = male age edu birthpl vil_tocd) yr16 vil1-vil13  ///
if ptf_if==1 & hh_lf==0 & hh_lrch==0 [pweight=wt], first

**
display "sigmahat " sqrt(`sigmahatsq')

**
qui sum se_swgmn
display "omega(average)" r(mean)

drop resid-wt

**********************
** IV: linear, 1 IV
**********************

**
qui ivregress 2sls hp_swgmn (nf_wgmn = age) ///
if ptf_if==1 & hh_lf==0 & hh_lrch==0

predict resid, residuals
** generating residuals
gen residsq = resid*resid

** sum of residual squares
qui sum residsq
local sumresidsq =r(sum)

** generating Omega
gen omegasq = se_swgmn^2

**
set matsize 10000
mkmat omegasq, matrix(omegasq)

**
matrix G= diag(omegasq)

**
qui sum omegasq
local sumomegasq =r(sum)

** generating matrices
gen ones=1
mkmat nf_wgmn yr16 vil1-vil13 ones, matrix(X)
local N= rowsof(X)
local K=colsof(X)
matrix S =  inv(X'*X)*X'*G*X

** computing sigma and weights
local tr_S = trace(S)
local sigmahatsq = (`sumresidsq' - `sumomegasq' + `tr_S')/(`N' - `K')

**
gen wt = 1/(sqrt(omegasq+`sigmahatsq'))

** second step regression
ivregress 2sls hp_swgmn (nf_wgmn = age) ///
if ptf_if==1 & hh_lf==0 & hh_lrch==0 [pweight=wt], first

**
display "sigmahat " sqrt(`sigmahatsq')

**
qui sum se_swgmn
display "omega(average)" r(mean)

drop resid-wt

*********************************
** IV: log-log, multiple IVs
*********************************

**
qui ivregress 2sls lhp_swgmn (lnf_wgmn = male age edu birthpl vil_tocd) ///
if ptf_if==1 & hh_lf==0 & hh_lrch==0

predict resid, residuals
** generating residuals
gen residsq = resid*resid

** sum of residual squares
qui sum residsq
local sumresidsq =r(sum)

** generating Omega
gen omegasq = se_lswgmn^2

**
set matsize 10000
mkmat omegasq, matrix(omegasq)

**
matrix G= diag(omegasq)

**
qui sum omegasq
local sumomegasq =r(sum)

** generating matrices
gen ones=1
mkmat nf_wgmn yr16 vil1-vil13 ones, matrix(X)
local N= rowsof(X)
local K=colsof(X)
matrix S =  inv(X'*X)*X'*G*X

** computing sigma and weights
local tr_S = trace(S)
local sigmahatsq = (`sumresidsq' - `sumomegasq' + `tr_S')/(`N' - `K')

**
gen wt = 1/(sqrt(omegasq+`sigmahatsq'))

** second step regression

ivregress 2sls lhp_swgmn (lnf_wgmn = male age edu birthpl vil_tocd) ///
if ptf_if==1 & hh_lf==0 & hh_lrch==0 [pweight=wt], first

**
display "sigmahat " sqrt(`sigmahatsq')

**
qui sum se_swgmn
display "omega(average)" r(mean)

drop resid-wt

*****************************
** IV: log-log, multiple IVs
** W/ vil dummies
*****************************

**
qui ivregress 2sls lhp_swgmn (lnf_wgmn = male age edu birthpl vil_tocd) yr16 vil1-vil13  ///
if ptf_if==1 & hh_lf==0 & hh_lrch==0

predict resid, residuals
** generating residuals
gen residsq = resid*resid

** sum of residual squares
qui sum residsq
local sumresidsq =r(sum)

** generating Omega
gen omegasq = se_lswgmn^2

**
set matsize 10000
mkmat omegasq, matrix(omegasq)

**
matrix G= diag(omegasq)

**
qui sum omegasq
local sumomegasq =r(sum)

** generating matrices
gen ones=1
mkmat nf_wgmn yr16 vil1-vil13 ones, matrix(X)
local N= rowsof(X)
local K=colsof(X)
matrix S =  inv(X'*X)*X'*G*X

** computing sigma and weights
local tr_S = trace(S)
local sigmahatsq = (`sumresidsq' - `sumomegasq' + `tr_S')/(`N' - `K')

**
gen wt = 1/(sqrt(omegasq+`sigmahatsq'))

** second step regression
ivregress 2sls lhp_swgmn (lnf_wgmn = male age edu birthpl vil_tocd) yr16 vil1-vil13  ///
if ptf_if==1 & hh_lf==0 & hh_lrch==0 [pweight=wt], first

**
display "sigmahat " sqrt(`sigmahatsq')

**
qui sum se_swgmn
display "omega(average)" r(mean)

drop resid-wt

************************************************
** Test wage wedges: Bootstrap
************************************************

/** multiple IVs: linear, multiple IVs
bootstrap, reps(500): ivregress 2sls hp_swgmn (nf_wgmn = male age edu birthpl vil_tocd) ///
if ptf_if==1 & hh_lf==0 & hh_lrch==0, vce(cluster vil_id)

** multiple IVs: log-log, multiple IVs
bootstrap, reps(500): ivregress 2sls lhp_swgmn (lnf_wgmn = male age edu birthpl vil_tocd) ///
if ptf_if==1 & hh_lf==0 & hh_lrch==0, vce(cluster vil_id)**/


********************************************************************************
** Tab A3: Estimation the home production function
********************************************************************************

clear all
use Ma_EDCC_2020_Data.dta

********************************************
** Zero observation transformed
********************************************

** Total home prod costs
gen hh_hpc = hh_cropc + hh_mechc + hh_hirelbc + hh_lsc  

** Total home production days
gen hh_hpd = 0
qui foreach i of numlist 1/512{
	egen temp1=sum(hp_d) if hhno==`i' & yr==2015 
	egen temp2=sum(hp_d) if hhno==`i' & yr==2016 
	egen temp3=max(temp1) if hhno==`i' & yr==2015 
	egen temp4=max(temp2) if hhno==`i' & yr==2016 
	
	replace hh_hpd=temp3 if hhno==`i' & yr==2015 
	replace hh_hpd=temp4 if hhno==`i' & yr==2016
	drop temp*
	}

** Chnage unit of value
qui foreach i in hh_hpv hh_hpc hh_lsv{
	replace `i'=`i'*1000
	}
	
** Define large ranches, the top 2.5%
gen hh_lrch=0
replace hh_lrch=1 if hh_lsv>100000

** Transform zeros by adding one
qui foreach i in hh_hpv  {
	replace `i'= `i'+ 1 if hh_hpd!=0
	}
	
** Only add one to HH that produce sth
qui foreach i in hh_hpc hh_el hh_hpd {
	replace `i'= `i'+ 1 if hh_hpd!=0
	}	

** Log-transformation of the Cobb-Dougls form
qui foreach i in hh_hpv hh_hpc hh_hpd hh_el {
	gen ln_`i'=log(`i')
	}	
	
** Column 1
** Vil dummies
local hh_v "ln_hh_el ln_hh_hpd ln_hh_hpc"
local hh_v2 "hh_perncrop hh_lc hh_ldreall hh_ldexpp hh_elcontr hh_subs "
local hh_v3 "genr hh_lb hh_hsld hh_mechv hh_headm hh_headage hh_headedu"
local fe "yr16 vil1-vil13" 

qui reg ln_hh_hpv ///
`hh_v' `hh_v2' `hh_v3' `fe' ///
if pers_id==1 & hh_lf==0 & hh_lrch==0, vce(cluster vil_id)

est sto m, title(tabA3 column 1)

esttab m, keep (ln_hh_el ln_hh_hpd ln_hh_hpc _cons) ///
cells(b(star fmt(2)) se(par fmt(3))) star(* 0.10 ** 0.05 *** 0.01) ///
legend label varlabels(_cons Constant) ///
stats(N r2 df_r)

** Column 2
** No vil dummies
** Additional controls
local hh_v "ln_hh_el ln_hh_hpd ln_hh_hpc"
local hh_v2 "hh_perncrop hh_lc hh_ldreall hh_ldexpp hh_elcontr hh_subs "
local hh_v3 "genr hh_lb hh_hsld hh_mechv hh_headm hh_headage hh_headedu hh_tvif hh_acif hh_carif hh_pns"
local fe "yr16 vil_tocnt vil_tocd" 

qui reg ln_hh_hpv ///
`hh_v' `hh_v2' `hh_v3' `fe' ///
if pers_id==1 & hh_lf==0 & hh_lrch==0, vce(cluster vil_id)	

est sto m, title(tabA3 column 2)

esttab m, keep (ln_hh_el ln_hh_hpd ln_hh_hpc _cons) ///
cells(b(star fmt(2)) se(par fmt(3))) star(* 0.10 ** 0.05 *** 0.01) ///
legend label varlabels(_cons Constant) ///
stats(N r2 df_r)
	
** Column 4
** No vil dummies
local hh_v "ln_hh_el ln_hh_hpd ln_hh_hpc"
local hh_v2 "hh_perncrop hh_lc hh_ldreall hh_ldexpp hh_elcontr hh_subs "
local hh_v3 "genr hh_lb hh_hsld hh_mechv hh_headm hh_headage hh_headedu"
local fe "yr16" 

qui reg ln_hh_hpv ///
`hh_v' `hh_v2' `hh_v3' `fe' ///
if pers_id==1 & hh_lf==0 & hh_lrch==0, vce(cluster vil_id)

est sto m, title(tabA3 column 4)

esttab m, keep (ln_hh_el ln_hh_hpd ln_hh_hpc _cons) ///
cells(b(star fmt(2)) se(par fmt(3))) star(* 0.10 ** 0.05 *** 0.01) ///
legend label varlabels(_cons Constant) ///
stats(N r2 df_r)


********************************************
** Zero observation NOT transformed
********************************************

clear all
use Ma_EDCC_2020_Data.dta

*****************************************
** Estimation home production function
*****************************************

** Total home prod costs
gen hh_hpc = hh_cropc + hh_mechc + hh_hirelbc + hh_lsc  

** Total home production days
gen hh_hpd = 0
qui foreach i of numlist 1/512{
	egen temp1=sum(hp_d) if hhno==`i' & yr==2015 
	egen temp2=sum(hp_d) if hhno==`i' & yr==2016 
	egen temp3=max(temp1) if hhno==`i' & yr==2015 
	egen temp4=max(temp2) if hhno==`i' & yr==2016 
	
	replace hh_hpd=temp3 if hhno==`i' & yr==2015 
	replace hh_hpd=temp4 if hhno==`i' & yr==2016
	drop temp*
	}

** Chnage unit of value
qui foreach i in hh_hpv hh_hpc hh_lsv{
	replace `i'=`i'*1000
	}
	
** Define large ranches, the top 2.5%
gen hh_lrch=0
replace hh_lrch=1 if hh_lsv>100000

** Log-transformation of the Cobb-Dougls form
qui foreach i in hh_hpv hh_hpc hh_hpd hh_el {
	gen ln_`i'=log(`i')
	}	
	
** Column 3
**  No vil dummies
local hh_v "ln_hh_el ln_hh_hpd ln_hh_hpc"
local hh_v2 "hh_perncrop hh_lc hh_ldreall hh_ldexpp hh_elcontr hh_subs"
local hh_v3 "genr hh_lb hh_hsld hh_mechv hh_headm hh_headage hh_headedu"
local vil_fe "yr16" 

qui reg ln_hh_hpv ///
`hh_v' `hh_v2' `hh_v3' `vil_fe' ///
if pers_id==1 & hh_lf==0 & hh_lrch==0, vce(cluster vil_id)

est sto m, title(tabA3 column 3)

esttab m, keep (ln_hh_el ln_hh_hpd ln_hh_hpc _cons) ///
cells(b(star fmt(2)) se(par fmt(3))) star(+ 0.132 * 0.10 ** 0.05 *** 0.01) ///
legend label varlabels(_cons Constant) ///
stats(N r2 df_r)
